################################################################################ 
# REPEATED REDUCED FORM REGRESSIONS: EXCLUSION RESTRICTION (DV: COUNT VICTIMS) #
################################################################################
# Author: Kasia Nalewajko
# First created: 15 November 2023
# Replicated: 10 June 2024

rm(list = ls())

# LOAD PACKAGES -----------------------------------------------------------

if (!require("dplyr")) install.packages("dplyr")
if (!require("modelsummary")) install.packages("modelsummary")
if (!require("estimatr")) install.packages("estimatr")
if (!require("ggplot2")) install.packages("ggplot2")

# LOAD DATA ---------------------------------------------------------------

load("./00 SUBMITTED/00 APSR final/04 replication_files/01 data/main.Rda")

# RUN THE MODELS -------

insurgents <- sort(unique(main$FFIFFCgentile))
insurgents <- insurgents[insurgents>2]

# Create empty containers
outcome <- rep(NA, 1*length(insurgents))
estimate <- rep(NA, 1*length(insurgents))
number_insurgents <- rep(NA, 1*length(insurgents))
nobs <- rep(NA, 1*length(insurgents))
p <- rep(NA, 1*length(insurgents))
CI_lower <- rep(NA, 1*length(insurgents))
CI_upper <- rep(NA, 1*length(insurgents))
models_list <- list()

# Loop over outcomes
for (i in 1:length(insurgents)) {
  
  models_list[[i]] <- summary(lm_robust(formula = log(all_jewish_victims+1) ~ log(mpf1000+1) + log(pop1936+1) + synagogues_dummy + log(collabos_antijew1000+1) + log(DHI_milipol_sum1942+1) + area_sqkm  + longitude + longitudesq + latitude + latitudesq + zone5,
                                        clusters = id_bureau,
                                        fixed_effects = ar_name,
                                        data = subset(main, FFIFFCgentile <= insurgents[i])))
  
  outcome[[i]] <- "all_jewish_victims"
  number_insurgents[[i]] <- insurgents[i]
  estimate[[i]] <- models_list[[i]][["coefficients"]][1,1]
  nobs[[i]] <- models_list[[i]][["nobs"]]
  p[[i]] <- models_list[[i]][["coefficients"]][1,4]
  CI_lower[[i]] <- models_list[[i]][["coefficients"]][1,5]
  CI_upper[[i]] <- models_list[[i]][["coefficients"]][1,6]
  
  print(i)
}

# PUT TOGETHER DATA FRAME FOR PLOTTING -------

models_df_all_jewish_victims <- data.frame(
  outcome = outcome,
  number_insurgents = number_insurgents,
  estimate = estimate,
  nobs = nobs,
  p = p,
  CI_lower = CI_lower,
  CI_upper = CI_upper)


models_df_all_jewish_victims <- models_df_all_jewish_victims[!is.na(models_df_all_jewish_victims$estimate),]
models_df_all_jewish_victims$nobs_prop <- models_df_all_jewish_victims$nobs/max(models_df_all_jewish_victims$nobs)

# PLOT ---------------------------------------------------------------

models_df_all_jewish_victims %>% 
  filter(outcome == "all_jewish_victims") %>% 
  ggplot() +
  geom_hline(aes(yintercept = 0.05), linetype = 2, color = "gray20") +
  geom_vline(aes(xintercept = 29), linetype = 1, color = "gray20") +
  geom_vline(aes(xintercept = 49), linetype = 1, color = "gray20") +
  geom_text(aes(x=29, label="25th percentile\n", y=0.5), colour="blue", angle=90, text=element_text(size=11)) +
  geom_text(aes(x=49, label="median\n", y=0.5), colour="blue", angle=90, text=element_text(size=11)) +
  geom_point(aes(x = number_insurgents, 
                 y = p), 
             size = 1.75,
             color = "black"
  ) +
  theme_bw() +
  coord_cartesian(
    xlim = c(1, 100)
  )+
  labs(
    x = "Maximum number insurgents in county",
    y = "P-values"
  )

# SAVE ---------------------------------------------------------------

ggsave(
  "A5_reduced_countvictimsP.png",
  plot = last_plot(),
  path = "./00 SUBMITTED/00 APSR final/02 supplementary_materials/figures",
  width = 25,
  height = 15,
  units = "cm",
  dpi = 300
)


